#TODO:: MOVE
Load the raw output of your simulation here:
#Create 'read_and_aggregate': function reads simulation results and aggregates to the mean across all queens
read_and_aggregate = function(relative_path,cov_set_size, dataset, outcome_index, train_set_size){
df = readRDS(here::here(paste0( "/data/share/cdi/mliv/local repos/bigjobs/MLIV/", relative_path)))
# Aggregate performance (Bias, SE, RMSE) by queen
agg_perf_by_queen = aggregated_performance_by_queen(df)
# Reshape agg_perf_by_queen so metrics are the columns
wide_Ev_agg_perf_by_queen = pivot_wider(
data = agg_perf_by_queen,
id_cols = c("model", "queen" ),
names_from = metric,
values_from = Ev,
names_prefix = ""
)
# Order by model by queen
wide_Ev_agg_perf_by_queen <- wide_Ev_agg_perf_by_queen %>%
arrange(factor(model, levels = ALL_MODELS), factor(queen, levels = ALL_MODELS))
wide_Ev_agg_perf_by_queen$cov_set_size = cov_set_size
wide_Ev_agg_perf_by_queen$dataset = dataset
wide_Ev_agg_perf_by_queen$outcome_index = outcome_index
wide_Ev_agg_perf_by_queen$train_set_size = train_set_size
# Add type column
wide_Ev_agg_perf_by_queen = wide_Ev_agg_perf_by_queen %>%
mutate(type = case_when(
model == "ATE" ~ "ATE",
model == "OLS S" ~ "OLS S",
model %in% c("RF INF", "LASSO INF") ~ "INF",
model %in% c("RF T", "RF MOM IPW", "RF MOM DR", "CF", "CF LC") ~ "RF",
model %in% c("LASSO T", "LASSO MOM IPW", "LASSO MOM DR", "LASSO MCM", "LASSO MCM EA", "LASSO R") ~ "LASSO",
model == "CDML" ~ "CDML",
model %in% c("BART T", "BART S") ~ "BART",
model %in% c("XGBOOST S", "XGBOOST R") ~ "XGBOOST",
model %in% c("SL T", "SL S") ~ "SL",
TRUE ~ "Unknown"
))%>%
mutate(modelshort = case_when(
model %in% c("OLS S", "ATE", "CDML") ~ type,
.default = trimws(str_remove(model, type))))
return (wide_Ev_agg_perf_by_queen)
}#read in and aggregate files, across queens
asap_2_1000_small_noSL = read_and_aggregate(cov_set_size = "small", dataset = "asap", outcome_index = 2, train_set_size = "1000", relative_path = "results/aggregated_IATEs/asap/simulation_from_100224/aggregated_IATEs_data.rds")
#_1_1000_small_SL = read_and_aggregate(cov_set_size = "small", dataset = "asap", outcome_index = 2, train_set_size = "1000", relative_path = "results/aggregated_IATEs/ca/simulation_from_091024/aggregated_IATEs_data.rds") %>% filter(model %in% c("SL S", "SL T"))
asap_2_1000_medium_noSL = read_and_aggregate(cov_set_size = "medium", dataset = "asap", outcome_index = 2, train_set_size = "1000", relative_path = "results/aggregated_IATEs/asap/simulation_from_100824/aggregated_IATEs_data.rds")
#asap_2_1000_medium_SL = read_and_aggregate(cov_set_size = "medium", dataset = "asap", outcome_index = 2, train_set_size = "1000", relative_path = "results/aggregated_IATEs/ca/simulation_from_091124/aggregated_IATEs_data.rds")%>% filter(model %in% c("SL S", "SL T"))
asap_2_1000_large_noSL = read_and_aggregate(cov_set_size = "large", dataset = "asap", outcome_index = 2, train_set_size = "1000", relative_path = "results/aggregated_IATEs/asap/simulation_from_101124/aggregated_IATEs_data.rds")
#asap_2_1000_large_SL = read_and_aggregate(cov_set_size = "large", dataset = "asap", outcome_index = 2, train_set_size = "1000", relative_path = "results/aggregated_IATEs/ca/simulation_from_091624/aggregated_IATEs_data.rds")%>% filter(model %in% c("SL S", "SL T"))
asap_2_2000_small_noSL = read_and_aggregate(cov_set_size = "small", dataset = "asap", outcome_index = 2, train_set_size = "2000", relative_path = "results/aggregated_IATEs/asap/simulation_from_101024/aggregated_IATEs_data.rds")
#asap_2_2000_small_SL = read_and_aggregate(cov_set_size = "small", dataset = "asap", outcome_index = 2, train_set_size = "2000", relative_path = "results/aggregated_IATEs/ca/simulation_from_091824/aggregated_IATEs_data.rds")%>% filter(model %in% c("SL S", "SL T"))
asap_2_2000_medium_noSL = read_and_aggregate(cov_set_size = "medium", dataset = "asap", outcome_index = 2, train_set_size = "2000", relative_path = "results/aggregated_IATEs/asap/simulation_from_101124/aggregated_IATEs_data.rds")
#asap_2_2000_medium_SL = read_and_aggregate(cov_set_size = "medium", dataset = "asap", outcome_index = 2, train_set_size = "2000", relative_path = "results/aggregated_IATEs/ca/simulation_from_092424/aggregated_IATEs_data.rds")%>% filter(model %in% c("SL S", "SL T"))
asap_2_2000_large_noSL = read_and_aggregate(cov_set_size = "large", dataset = "asap", outcome_index = 2, train_set_size = "2000", relative_path = "results/aggregated_IATEs/asap/simulation_from_101424/aggregated_IATEs_data.rds")
#asap_2_2000_large_SL = read_and_aggregate(cov_set_size = "large", dataset = "asap", outcome_index = 2, train_set_size = "2000", relative_path = "results/aggregated_IATEs/ca/simulation_from_092524/aggregated_IATEs_data.rds")%>% filter(model %in% c("SL S", "SL T"))
#asap binary, small, 1000
#asap_1_1000_small_noSL = read_and_aggregate(cov_set_size = "small", dataset = "asap", outcome_index = 1, train_set_size = "1000", relative_path = "results/aggregated_IATEs/asap/simulation_from_100224/aggregated_IATEs_data.rds")
combined_all = rbind(asap_2_1000_small_noSL, #asap_1_1000_small_SL,
asap_2_1000_medium_noSL,# asap_1_1000_medium_SL,
asap_2_1000_large_noSL, #asap_1_1000_large_SL,
asap_2_2000_small_noSL, #asap_1_2000_small_SL,
asap_2_2000_medium_noSL,# asap_1_2000_medium_SL,
asap_2_2000_large_noSL #asap_1_2000_large_SL
) %>%
#specify order for cov_set_size
mutate(cov_set_size = factor(cov_set_size, levels = c("small", "medium", "large")))
write_xlsx(combined_all, path = here::here("graphs/combined/combined_all_asap_cont.xlsx")) #specifying axis maximums (x and y both) and breaks by datasets
#update this when adding new datasets/outcomes
scenario_maximums = rbind(
data.frame(dataset = "asap", outcome_index = "2", max = 15, breaks = I(list(c(0,2,4,6,8,10,12,14)))),
data.frame(dataset = "asap", outcome_index = "1", max = 1, breaks = I(list(c(0,.2,.4,.6,.8)))))
#define a function to make the euclidean_distance
make_euclidean_distance = function(max){
euclidean_distance = expand.grid(x = seq (0, max, length.out=50),y = seq (0, max, length.out=50))
euclidean_distance$z = with(euclidean_distance, sqrt(x^2 + y^2))
return(euclidean_distance)
}df = combined_all
df$set_id = paste( df$dataset, df$outcome_index, df$cov_set_size, df$train_set_size, sep="-" )
ALL_MODELS <- unique( df$model )
no_core <- function( df_set ) {
#df_set %>% filter( !model %in% c( "ATE", "OLS S", "LASSO INF", "RF INF" ) )
df_set %>% filter( !baseline )
}
only_core <- function( df_set ) {
#df_set %>% filter( model %in% c( "ATE", "OLS S", "LASSO INF", "RF INF" ) )
df_set %>% filter( baseline )
}
# Function to make RMSE contour plots ----
TYPE_SHAPE_MAP <- c(
"ATE" = 20, "OLS S" = 18,
"INF" = 2, "RF" = 3, "CDML" = 8,
"LASSO" = 10, "SL" = 16,
"XGBOOST" = 1, "BART" = 0
)
TYPE_COLOR_MAP <- c(
"ATE" = "black", "OLS S" = "black",
"INF" = "darkgrey", "RF" = "#E69F00", "CDML" = "#F0E442",
"LASSO" = "#009E73", "SL" = "#D55E00",
"XGBOOST" = "#CC79A7", "BART" = "#0072B2" # "#56B4E9"
)
LEGEND_COLORS = setdiff( names(TYPE_COLOR_MAP), c("LASSO INF", "RF INF", "ATE", "OLS S") )
# For individual methods
SHAPE_MAP <- c(
"ATE" = 0, "OLS S" = 1,
"RF INF" = 2, "RF T" = 3, "RF MOM IPW" = 4, "RF MOM DR" = 5,
"CF" = 6, "CF LC" = 7, "CDML" = 8, "LASSO INF" = 9, "LASSO T" = 10, "LASSO MOM IPW" = 11,
"LASSO MOM DR" = 12, "LASSO MCM" = 13, "LASSO MCM EA" = 14, "LASSO R" = 15,
"SL T" = 16, "SL S" = 17, "XGBOOST S" = 18, "XGBOOST R" = 19, "BART T" = 20, "BART S" = 21
)
old_type_shape_map <- c(
"ATE" = 18, "OLS S" = 20, "INF" = 2, "RF" = 3, "CDML" = 8, "LASSO" = 10, "SL" = 16, "XGBOOST" = 0, "BART" = 1
)
old_shape_map <- c(
"ATE" = 0, "OLS S" = 1, "RF INF" = 2, "RF T" = 3, "RF MOM IPW" = 4, "RF MOM DR" = 5,
"CF" = 6, "CF LC" = 7, "CDML" = 8, "LASSO INF" = 9, "LASSO T" = 10, "LASSO MOM IPW" = 11,
"LASSO MOM DR" = 12, "LASSO MCM" = 13, "LASSO MCM EA" = 14, "LASSO R" = 15,
"SL T" = 16, "SL S" = 17, "XGBOOST S" = 18, "XGBOOST R" = 19, "BART T" = 20, "BART S" = 21
)
contour_plot_base <- function( max_bias ,
max_se ,
breaks) {
euclidean_distance = expand.grid(x = seq( 0, 1.1*max_bias, length.out=100 ),
y = seq(0, 1.1*max_se, length.out=100 ) )
euclidean_distance$z = with(euclidean_distance, sqrt(x^2 + y^2))
plot_base <- list(
geom_contour(
data = euclidean_distance,
aes(x = x, y = y, z = z),
breaks = breaks
),
theme_minimal(),
theme(
plot.background = element_rect(fill = "white", color = NA),
panel.background = element_rect(fill = "white", color = NA),
text = element_text(color = "black"),
plot.title = element_text(size = 16),
plot.subtitle = element_text(size = 12),
axis.title = element_text(size = 14),
legend.title = element_text(size = 14),
legend.text = element_text(size = 12)
),
labs(
title = "Bias and SE, averaged across Queens",
subtitle = "* ATE queen is excluded",
col = "Model Type",
y = "Bias", # Change X-axis title
x = "SE" ), # Change Y-axis title
scale_shape_manual(values = TYPE_SHAPE_MAP),
scale_color_manual(values = TYPE_COLOR_MAP, breaks = LEGEND_COLORS),
guides(color = guide_legend(title = "Model Type"),
shape = guide_legend(title = "Model Type")),
coord_fixed()
)
plot_base
}
make_contour_plot <- function( df_set,
max_bias ,
max_se ,
breaks,
add_labels = TRUE ) {
p <- ggplot( df_set, aes(se, bias, xmax=max_se, ymax=max_bias) ) +
geom_point(aes(color = type, shape = type), size = 4 ) +
contour_plot_base( max_bias = max_bias, max_se = max_se , breaks= breaks)
if ( add_labels ) {
p <- p +
ggrepel::geom_label_repel(
aes(label = modelshort, col = type),
size = 3, # Increased label size
box.padding = 0.75,
point.padding = 0.25,
max.overlaps = Inf,
show.legend = FALSE
)
}
p
}df_agg$se = df_agg$SE
scenario_plot(scen_dataset="asap", scen_outcome_index = "2", scen_train_set_size = "1000", scen_cov_set_size = "small")## Scale for shape is already present.
## Adding another scale for shape, which will replace the existing scale.
scenario_plot(scen_dataset="asap", scen_outcome_index = "2", scen_train_set_size = "1000", scen_cov_set_size = "medium")## Scale for shape is already present.
## Adding another scale for shape, which will replace the existing scale.
scenario_plot(scen_dataset="asap", scen_outcome_index = "2", scen_train_set_size = "2000", scen_cov_set_size = "small")## Scale for shape is already present.
## Adding another scale for shape, which will replace the existing scale.
#set scen_max for asap outcome 2
scen_max = scenario_maximums%>%filter(dataset == "asap" & outcome_index == "2")%>% select(max) %>% unlist()
#use scen_max to create euclidean_distance
euclidean_distance=make_euclidean_distance(scen_max)
# Adjusting your plot
# Define shapes for each model type
train_size_shape_map = c("1000"=9, "2000"=2)
cov_set_size_color_map = c("small"="blue", "medium"="orange", "large" = "#696969")
usedata = df_agg %>%
filter(dataset =="asap" & outcome_index ==2)
p <- ggplot(usedata, aes(SE, bias, xmax=15, ymax=15, breaks=5)) +
geom_contour(
data = euclidean_distance,
aes(x = x, y = y, z = z),
breaks = c(2,4,6,8,10,12,14),
alpha = 0.6
) +
geom_point(aes(color =cov_set_size, shape=train_set_size ), size = 2) +
facet_wrap(~ model) + # Add facet_wrap
theme_minimal() +
scale_x_continuous(limits=c(0,15), breaks=c(0,5, 10))+
scale_y_continuous(breaks=c(0,5, 10))+
theme(
plot.background = element_rect(fill = "white", color = NA),
panel.background = element_rect(fill = "white", color = NA),
text = element_text(color = "black"),
plot.title = element_text(size = 12),
plot.subtitle = element_text(size = 12),
axis.title = element_text(size = 14),
legend.title = element_text(size = 14),
legend.text = element_text(size = 12),
strip.text = element_text(size =8),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)
) +
labs(
title = "Mean of Aggregated Bias and SE across Queens\nby Algorithm with RMSE Contouring",
subtitle = "* ATE queen is excluded",
col = "Scenario"
) +
ylab("Bias") + # Change X-axis title
xlab("SE") + # Change Y-axis title
scale_color_manual(values=cov_set_size_color_map)+
scale_shape_manual(values = train_size_shape_map) +
guides(color = guide_legend(title = "Covariate Set Size"),
shape = guide_legend(title = "Train Set Size"))
p# Adjusting your plot
usedata = combined_all %>%
filter(dataset =="asap") %>%
filter(outcome_index ==2) %>%
filter(queen %in% c("ATE", "CDML"))
usedata$queensize = paste0(usedata$queen, ", ", usedata$train_set_size)
combined_shape_map = c("ATE, 1000"=0, "ATE, 2000"=1, "CDML, 1000" = 15, "CDML, 2000"=16)
cov_set_size_color_map = c("small"="blue", "medium"="orange", "large" = "#696969")
scen_max = scenario_maximums%>%filter(dataset == "asap" & outcome_index == "2")%>% select(max) %>% unlist()
#use scen_max to create euclidean_distance
euclidean_distance=make_euclidean_distance(scen_max)
p <- ggplot(usedata, aes(se, bias, xmax=15, ymax=15, breaks=5)) +
geom_contour(
data = euclidean_distance,
aes(x = x, y = y, z = z),
breaks = c(2, 4, 6, 8, 10,12,14),
alpha = 0.6
) +
geom_point(aes(color =cov_set_size, shape=queensize ), size = 2) +
facet_wrap(~ model) + # Add facet_wrap
theme_minimal() +
scale_x_continuous(limits=c(0,15), breaks=c(0,5, 10))+
scale_y_continuous(breaks=c(0,5, 10))+
theme(
plot.background = element_rect(fill = "white", color = NA),
panel.background = element_rect(fill = "white", color = NA),
text = element_text(color = "black"),
plot.title = element_text(size = 12),
plot.subtitle = element_text(size = 12),
axis.title = element_text(size = 14),
legend.title = element_text(size = 14),
legend.text = element_text(size = 12),
strip.text = element_text(size =8),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)
) +
labs(
title = "Aggregated Bias and SE\nby Algorithm with RMSE Contouring",
subtitle = "ATE and CDML queens only",
col = "Scenario"
) +
ylab("Bias") + # Change X-axis title
xlab("SE") + # Change Y-axis title
scale_color_manual(values = cov_set_size_color_map) +
scale_shape_manual(values = combined_shape_map) +
guides(color = guide_legend(title = "Covariate Set Size"),
shape = guide_legend(title = "Train Set Size"))
p#create scenario_allqueens function: plot the results from each queen for a given model and scenario
scenario_allqueens = function(scen_dataset, scen_outcome_index, scen_train_set_size, scen_cov_set_size){
scen_max = scenario_maximums%>%filter(dataset == scen_dataset & outcome_index == scen_outcome_index)%>% select(max) %>% unlist()
max_bias = scen_max
max_se = scen_max
breaks = unlist(scenario_maximums%>%filter(dataset == scen_dataset & outcome_index == scen_outcome_index)%>% select(breaks))
#use scen_max to create euclidean_distance
euclidean_distance=make_euclidean_distance(scen_max)
usedata = combined_all[(combined_all$dataset ==scen_dataset & combined_all$outcome_index ==scen_outcome_index & combined_all$train_set_size==scen_train_set_size & combined_all$cov_set_size == scen_cov_set_size),]
p <- ggplot(usedata, aes(se, bias, xmax=scen_max, ymax=scen_max)) +
geom_contour(
data = euclidean_distance,
aes(x = x, y = y, z = z),
breaks = breaks,
alpha = 0.6
) +
geom_point(aes(color = queen, shape=queen), size = 2) +
scale_x_continuous(limits=c(0,scen_max))+
scale_y_continuous(limits=c(0,scen_max))+ #adding this to remove some outlier CDML
facet_wrap(~model) +
theme_minimal() +
theme(
plot.background = element_rect(fill = "white", color = NA),
panel.background = element_rect(fill = "white", color = NA),
text = element_text(color = "black"),
plot.title = element_text(size = 16),
plot.subtitle = element_text(size = 12),
axis.title = element_text(size = 14),
legend.title = element_text(size = 14),
legend.text = element_text(size = 12),
strip.text = element_text(size =8),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)
) +
labs(
title = "Aggregated Bias and SE by Queen\nfor Each Scenario with RMSE Contouring"
) +
ylab("Bias") + # Change X-axis title
xlab("SE") + # Change Y-axis title
scale_shape_manual(values = SHAPE_MAP) +
guides(color = guide_legend(title = "Queen"),
shape = guide_legend(title = "Queen"))
print(p)
# ggsave(here::here(paste(sep="_","graphs/combined/allqueens",scen_dataset, scen_outcome_index, scen_train_set_size, scen_cov_set_size, ".png")), p, width = 8, height = 6, units = "in", bg = 'white', limitsize=FALSE)
}scenario_allqueens(scen_dataset="asap", scen_outcome_index = "2", scen_train_set_size = "1000", scen_cov_set_size = "small")## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_point()`).
scenario_allqueens(scen_dataset="asap", scen_outcome_index = "2", scen_train_set_size = "1000", scen_cov_set_size = "medium")## Warning: Removed 18 rows containing missing values or values outside the scale range
## (`geom_point()`).
scenario_allqueens(scen_dataset="asap", scen_outcome_index = "2", scen_train_set_size = "1000", scen_cov_set_size = "large")## Warning: Removed 18 rows containing missing values or values outside the scale range
## (`geom_point()`).
scenario_allqueens(scen_dataset="asap", scen_outcome_index = "2", scen_train_set_size = "2000", scen_cov_set_size = "small")## Warning: Removed 18 rows containing missing values or values outside the scale range
## (`geom_point()`).
scenario_allqueens(scen_dataset="asap", scen_outcome_index = "2", scen_train_set_size = "2000", scen_cov_set_size = "medium")## Warning: Removed 18 rows containing missing values or values outside the scale range
## (`geom_point()`).
scenario_allqueens(scen_dataset="asap", scen_outcome_index = "2", scen_train_set_size = "2000", scen_cov_set_size = "large")## Warning: Removed 18 rows containing missing values or values outside the scale range
## (`geom_point()`).
# Make contour plot ----
df_agg$se = df_agg$SE
df_set_1 <- filter( df_agg, set_id == "asap-2-large-1000" )
df_set_2 <- filter( df_agg, set_id == "asap-2-large-2000" )
#max_bias = max( df_set_1$bias, df_set_2$bias )
#max_se = max( df_set_1$se, df_set_2$se )
scen_max = scenario_maximums%>%filter(dataset == "asap" & outcome_index == "2")%>% select(max) %>% unlist()
max_bias = scen_max
max_se = scen_max
breaks = unlist(scenario_maximums%>%filter(dataset == "asap" & outcome_index == "2")%>% select(breaks))
p1 <- make_contour_plot( df_set_1, max_bias=max_bias, max_se=max_se , breaks = breaks)
p1# Alternate labeling approaches? ----
p1B <- make_contour_plot( df_set_1, max_bias=max_bias, max_se=max_se, breaks = breaks, add_labels = FALSE )
p1B + ggrepel::geom_text_repel(
aes(label = modelshort, col = type),
size = 2, # Increased label size
max.overlaps = Inf,
show.legend = FALSE
)## [1] "ATE" "LASSO INF" "OLS S" "RF INF"
p1C <- ggplot( no_core( df_set_1 ), aes(se, bias, xmax=max_se, ymax=max_bias) ) +
contour_plot_base( max_bias = max_bias, max_se = max_se , breaks = breaks) +
geom_point(aes(color = type), size = 4, shape=19 ) +
geom_point( data = gbs, aes(pch=model), col="darkgrey", fill="darkgrey", size=4 ) +
# geom_point( data = gbs, size = 7, shape = 1, col="red" ) +
scale_shape_manual(breaks = c( "ATE", "OLS S", "LASSO INF", "RF INF" ),
values = c( 15, 23, 25, 24 ) ) +
guides(color = guide_legend(title = "Model Type"),
shape = guide_legend(title = "Reference") )## Scale for shape is already present.
## Adding another scale for shape, which will replace the existing scale.
p1C + ggrepel::geom_text_repel(
aes(label = modelshort, col = type),
size = 3, # Increased label size
max.overlaps = Inf,
min.segment.length = Inf,
show.legend = FALSE
)#ggsave( here::here( "graphs/possible_clean_plot_1.pdf" ) )
if ( FALSE ) {
# This isn't working
# Trying out animation ----
library(ggplot2)
library(gganimate)
df_range = bind_rows( set1 = df_set_1, set2 = df_set_2, .id = "set" ) %>%
# dplyr::select( -n, -rmse ) %>%
group_by( model, type, modelshort )
# Plot and animate
p <- ggplot( df_range, aes(se, bias, xmax=max_se, ymax=max_bias) ) +
geom_point(aes(color = type, shape=type), size = 4) +
transition_states( set, transition_length = 2, state_length = 1, wrap=FALSE) +
ease_aes('linear') +
labs(title = "Scenario: {closest_state}") +
contour_plot_base( max_bias = max_bias, max_se = max_se , breaks = breaks ) +
theme( legend.position = "none" )
p
anim <- animate(p, duration = 5, fps = 10, width = 600, height = 400)
gganimate::anim_save( here::here( "graphs/initial_animation.gif") )
# Extract the first frame
first_frame <- anim::anim_save(anim, start_pause = 1, nframes = 1)
# Extract the last frame
last_frame <- anim_save(anim, end_pause = 1, nframes = 1)
# Add labels to the first frame
first_frame_labeled <- ggplot_build(first_frame) +
ggrepel::geom_label_repel(
aes( label = after_stat(ifelse(set %in% c("set1", "set2"), modelshort, NA)), color = type),
size = 3, # Increased label size
box.padding = 0.75,
point.padding = 0.25,
max.overlaps = Inf,
show.legend = FALSE
)
geom_label_repel(aes(label = modelshort, color = type), size = 5)
# Add labels to the last frame
last_frame_labeled <- ggplot_build(last_frame) +
geom_label_repel(aes(label = modelshort, color = type), size = 5)
# You can now save or display these labeled frames
# ggsave("first_frame_with_labels.png", plot = first_frame_labeled)
# ggsave("last_frame_with_labels.png", plot = last_frame_labeled)
}
# "Trails from last plot" plot ----
d_b = df_set_2 %>%
ungroup() %>%
dplyr::select( -c( set_id:train_set_size ) ) %>%
left_join( dplyr::select( ungroup( df_set_1 ), model, bias, se ),
by = "model",
suffix = c( "", "old" ) )
plt <- ggplot( d_b, aes(se, bias, xmax=max_se, ymax=max_bias) ) +
geom_segment(aes(x = seold, y = biasold, xend = se, yend = bias),
arrow = arrow(length = unit(0.2, "cm")), lwd = 1, col="darkgrey") +
geom_point(aes(color = type, shape=type, size = as.numeric(3+1*baseline) ), fill="grey" ) +
contour_plot_base( max_bias = max_bias, max_se = max_se , breaks = breaks) +
labs( title = "Trails of Models from 1000 to 2000" ) +
scale_size_identity() +
guides( size="none" )
plt# A second version
pltB <- ggplot( d_b, aes(se, bias, xmax=max_se, ymax=max_bias) ) +
#geom_point(aes(color = type, shape=type, size = as.numeric(4+2*baseline) ), fill="grey" ) +
geom_segment(aes(x = seold, y = biasold, xend = se, yend = bias),
arrow = arrow(length = unit(0.2, "cm")), lwd = 1, col="lightgrey") +
geom_point( data = no_core( d_b ), aes(color = type), size = 4, shape=19 ) +
geom_point( data = only_core( d_b ), aes(pch=model), col="darkgrey", fill="darkgrey", size=4 ) +
contour_plot_base( max_bias = max_bias, max_se = max_se , breaks = breaks ) +
labs( title = "Trails of Models from 1000 to 2000" ) +
scale_size_identity() +
scale_shape_manual(breaks = c( "ATE", "OLS S", "LASSO INF", "RF INF" ),
values = c( 15, 23, 25, 24 ) ) +
guides(color = guide_legend(title = "Model Type"),
shape = guide_legend(title = "Reference"),
size="none" )## Scale for shape is already present.
## Adding another scale for shape, which will replace the existing scale.
pltB + ggrepel::geom_text_repel(
aes(label = modelshort, col = type),
size = 3, # Increased label size
max.overlaps = Inf,
min.segment.length = Inf,
show.legend = FALSE
)